library(pacman)
p_load(
  here, fst, data.table, kableExtra, purrr, stringr, collapse
)

# Loading data ----------------------------------------------------------------
  # Plant info table
  plant_info_dt = 
    read.fst(
      path = here("Data/electricity-generation/plant-info-dt-oge.fst"),
      as.data.table = TRUE
    )[year == 2019,.SD[which.min(year)], by= plant_id_eia] |>
    setkey(plant_id_eia, year)
  # Filepaths of all the plants with fitted models 
  plant_gen_fp = list.files(
    here('Data/electricity-generation/plant-model-fit/plant-gen-dt'),
    full.names = TRUE
  )
  # Avg emissions rates 
  avg_fc_emissions_dt = fread(
    here('Data/electricity-generation/output/emissions-rate-avg-fc-oge.csv')
  )
# Function to summarize elec-gen for a particular region
# This fitted data with just plants we have models for
summarize_plant_gen_dt_nerc = function(nerc_adj_in){
  print(nerc_adj_in)
  # Getting file paths for all plants in region
  nerc_plant_gen_fp = 
    plant_gen_fp[
      str_extract(plant_gen_fp, '(?<=-)\\d*(?=\\.fst)') |> as.integer()
      %in% plant_info_dt[nerc_adj == nerc_adj_in]$plant_id_eia
    ]
  # Summarizing generation by time, ic, nerc, fuel cat
  elec_gen_dt = 
    map_dfr(
      nerc_plant_gen_fp, 
      read.fst,
      as.data.table = TRUE
    ) |>
    join(
      plant_info_dt, 
      on = 'plant_id_eia'
    ) |>
    join(
      avg_fc_emissions_dt ,
      on = 'fuel_category'
    )
  # Fixing the bad plants
  elec_gen_dt[,':='(
    nox_mass_lb_for_electricity = fifelse(
      plant_id_eia %in% c(2221, 2528, 2503, 50626, 50931), 
      net_generation_mwh*nox_tons_per_mwh_p50_fc,
      nox_mass_lb_for_electricity
    ),
    so2_mass_lb_for_electricity = fifelse(
      plant_id_eia %in% c(2221, 2528, 2503, 50626, 50931), 
      net_generation_mwh*so2_tons_per_mwh_p50_fc,
      so2_mass_lb_for_electricity
    ),
    co2e_mass_lb_for_electricity = fifelse(
      plant_id_eia %in% c(2221, 2528, 2503, 50626, 50931), 
      net_generation_mwh*co2e_tons_per_mwh_p50_fc,
      co2e_mass_lb_for_electricity
    )
  )]
  plant_gen_dt = 
    elec_gen_dt[,.(
      tot_net_generation_mwh = sum(net_generation_mwh_raw),
      tot_fit_net_generation_mwh = sum(fit_net_generation_mwh),
      nox_lb = sum(nox_mass_lb_for_electricity),
      so2_lb = sum(so2_mass_lb_for_electricity),
      co2e_lb = sum(co2e_mass_lb_for_electricity),
      num_hours = .N,
      num_plants = uniqueN(plant_id_eia)), 
      keyby = .(nerc_adj, fuel_category)
    ]
  return(plant_gen_dt)
}
# Function to summarize elec-gen for a particular region
# This is the raw data with all plants
summarize_elec_gen_dt_nerc = function(nerc_adj_in){
  # Summarizing generation by time, ic, nerc, fuel cat
  elec_gen_dt = 
    read.fst(
      path = here(
        "Data/electricity-generation/elec-gen-dt",
        paste0("elec-gen-dt-",tolower(nerc_adj_in),".fst")
      ), 
      as.data.table = TRUE
    )[,.(
        tot_net_generation_mwh = sum(net_generation_mwh_raw),
        nox_lb = sum(nox_mass_lb_for_electricity),
        so2_lb = sum(so2_mass_lb_for_electricity),
        co2e_lb = sum(co2e_mass_lb_for_electricity),
        num_hours = .N,
        num_plants = uniqueN(plant_id_eia)
      ), 
      keyby = .(nerc_adj, fuel_category)
    ]
  return(elec_gen_dt)
}
  # Running for all regions 
  region_gen_dt = 
    map_dfr(
      c('CAL','MRO','NPCC','RFC','SERC','TRE','WECC'),
      summarize_plant_gen_dt_nerc
    )

# Summary statistics ----------------------------------------------------------
  # First by region 
  region_dt = 
    rbind(
      region_gen_dt[,.(
          n_plants = sum(num_plants), 
          net_gen_twh = round(sum(tot_net_generation_mwh)/1e5),
          perc_gen_ff = round(100*sum(
              ifelse(
                fuel_category %in% c('coal','natural_gas','petroleum'),
                tot_net_generation_mwh,0))/
            sum(tot_net_generation_mwh), digits= 1),
          nox_emissions_lb_mwh = round(sum(nox_lb)/sum(tot_net_generation_mwh), digits = 2),
          so2_emissions_lb_mwh = round(sum(so2_lb)/sum(tot_net_generation_mwh), digits = 2),
          co2e_emissions_lb_mwh = round(sum(co2e_lb)/sum(tot_net_generation_mwh))
        ), 
        keyby = .(Region = nerc_adj)
      ],
      region_gen_dt[,.(
          n_plants = sum(num_plants), 
          net_gen_twh = round(sum(tot_net_generation_mwh)/1e5),
          perc_gen_ff = round(100*sum(
              ifelse(
                fuel_category %in% c('coal','natural_gas','petroleum'),
                tot_net_generation_mwh,0))/
            sum(tot_net_generation_mwh), digits= 1),
          nox_emissions_lb_mwh = round(sum(nox_lb)/sum(tot_net_generation_mwh), digits = 2),
          so2_emissions_lb_mwh = round(sum(so2_lb)/sum(tot_net_generation_mwh), digits = 2),
          co2e_emissions_lb_mwh = round(sum(co2e_lb)/sum(tot_net_generation_mwh))
        ), 
        keyby = .(Region = rep('Total', nrow(region_gen_dt)))
      ]
    )
  # Region level summary table 
  region_dt |>
    kbl(
      caption = 'Summary statistics on power plants by region',
      format.args = list(big.mark = ",",scientific = FALSE),
      format = 'latex',
      label = 'region-plant-summary',
      booktabs = TRUE,
      col.names = c(
        'Region',
        'Number of plants',
        'Total (TWH)',
        '% Fossil Fuel',
        'NOx',
        'SO2',
        'CO2e'
      ),
      linesep = ''
    ) |>
    add_header_above(c(' ' = 2, 'Net Generation' = 2,'Emissions (lb/MWh)' = 3)) |>
    row_spec(c(8), bold = T)|>
    write(file = here('Draft/Figures/region-plant-summary.tex'))
  # Now by fuel type
  fuel_dt = 
    rbind(
      region_gen_dt[,.(
          n_plants = sum(num_plants), 
          net_gen_twh = round(sum(tot_net_generation_mwh)/1e5),
          nox_emissions_lb_mwh = round(sum(nox_lb)/sum(tot_net_generation_mwh), digits = 2),
          so2_emissions_lb_mwh = round(sum(so2_lb)/sum(tot_net_generation_mwh), digits = 2),
          co2e_emissions_lb_mwh = round(sum(co2e_lb)/sum(tot_net_generation_mwh))
        ), 
        keyby = .(
          fuel_category = ifelse(
            fuel_category %in% c('coal','natural_gas','petroleum','biomass', 'nuclear','solar'), fuel_category,
            'zother'
          ),
          fuel_category_name = fcase(
            fuel_category == 'biomass', 'Biomass',
            fuel_category == 'coal', 'Coal',
            fuel_category == 'geothermal', 'Geothermal',
            fuel_category == 'hydro', 'Hydro',
            fuel_category == 'natural_gas', 'Natural Gas',
            fuel_category == 'nuclear', 'Nuclear',
            fuel_category == 'petroleum', 'Petroleum',
            #fuel_category == 'solar','Solar',
            fuel_category == 'waste', 'Waste',
            #fuel_category == 'wind', 'Wind',
            default = 'Other'
          )
        )
      ],
      region_gen_dt[,.(
          n_plants = sum(num_plants), 
          net_gen_twh = round(sum(tot_net_generation_mwh)/1e5),
          nox_emissions_lb_mwh = round(sum(nox_lb)/sum(tot_net_generation_mwh), digits = 2),
          so2_emissions_lb_mwh = round(sum(so2_lb)/sum(tot_net_generation_mwh), digits = 2),
          co2e_emissions_lb_mwh = round(sum(co2e_lb)/sum(tot_net_generation_mwh))
        ), 
        keyby = .(
          fuel_category = rep('Total', nrow(region_gen_dt)),
          fuel_category_name = rep('Total', nrow(region_gen_dt))
        )
      ]
    )
    
  # Fuel category summary table 
  fuel_dt[,-'fuel_category'] |>
    kbl(
      caption = 'Summary statistics on power plants by fuel category',
      format.args = list(big.mark = ",",scientific = FALSE),
      format = 'latex',
      label = 'fuel-plant-summary',
      booktabs = TRUE,
      col.names = c(
        'Fuel',
        'Number of plants',
        'Net generation (TWH)',
        'NOx',
        'SO2',
        'CO2e'
      ),
      linesep = ''
    ) |>
    add_header_above(c(' ' = 3,'Emissions (lb/MWh)' = 3)) |>
    row_spec(c(10), bold = T)|>
    write(file = here('Draft/Figures/fuel-plant-summary.tex'))

